/*

This file makes table 2.

*/
global writeout 0 // change to 1 for writing out, 0 to not
global quietly quietly // comment out for noisy


$quietly { // top top quietly
clear
set more off
set seed 1337
set sortseed 1337

capture cd "~/Desktop/RESTAT_CODE"

global dir_root = "."
global dir_code = "${dir_root}/code"
global dir_data = "${dir_root}/data"
global dir_output "${dir_root}/output"

// Load the bootstrap data
quietly do "${dir_code}/1.1_cache-load_lasso_data.do"


/* This requires 2.1_cache_bootstrap_results.do to have been run, so those boostrap_results files exist. */
preserve
quietly ///
foreach cp in "c" "p" {
  forvalues cpi = 1/4 {
    use "${dir_output}/bootstrap/boostrap_results_`cp'-`cpi'", clear
    noi di "`cp'_`cpi'"
    foreach v in "lambda" "u_a" "u_b" "alpha_1" "alpha_0" {
      replace T_star`v' = abs(T_star`v')
      _pctile T_star`v', p(0.0000000001 95)

      scalar `v'_ci5_`cp'_`cpi' = r(r2) - r(r1)
      scalar `v'_ci95_`cp'_`cpi' = r(r2) - r(r1)
      noisily display "    `v': [`=`v'_ci5_`cp'_`cpi'' <--> `=`v'_ci95_`cp'_`cpi'']"
    }
  }
}
restore

/* Caches this dataset for rerunning speed. */
cap use "${dir_data}/table2_lasso_results.dta", clear
if _rc | $overwrite_lasso_dataset {
  capture generate cia_pos = DI
  capture generate tested_yn = Dtau
  eststo clear

  *create thresholds
    sum cia_pos
      scalar theta_base = r(mean)

  *lasso with symptoms 1 variables
    eststo: rlasso cia_pos `=w_c_1'
        scalar lasso1 = e(selected)
        predict DI_hat_c1 , xb
      generate est_smp_c_1 = DI_hat_c1 > theta_base if !missing(DI_hat_c1)
      /*-------------------------------------------------
               Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
                  csym9 |       0.0626358      0.1245344
                  _cons |*      0.0108785      0.0106007
      ---------------------------------------------------*/

  *lasso with symptom 1 and nonsymptom variables
    eststo: rlasso cia_pos `=w_c_2'
        scalar lasso2 = e(selected)
        predict DI_hat_c2, xb
        _pctile DI_hat_c2, p(97)
        scalar thetac2 = r(r1)
      generate est_smp_c_2 = DI_hat_c2 > thetac2 if !missing(DI_hat_c2)
      /*-------------------------------------------------
               Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
                  csym9 |       0.0357616      0.1032472
               nonvar10 |       0.0327498      0.0550928
                  _cons |*      0.0092797      0.0079797
      ---------------------------------------------------*/

  *lasso with symptom 2 variables
    eststo: rlasso cia_pos `=w_p_1'
        scalar lasso3 = e(selected)
        predict DI_hat_p1, xb
      generate est_smp_p_1 = DI_hat_p1 > theta_base if !missing(DI_hat_p1)
      /*-------------------------------------------------
               Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
                 psym15 |       0.0267525      0.0682248
                  _cons |*      0.0108748      0.0104269
      ---------------------------------------------------*/

  *lasso with symptom 2 and nonsymptom variables
    eststo: rlasso cia_pos `=w_p_2'
        scalar lasso4 = e(selected)
        predict DI_hat_p2, xb
        _pctile DI_hat_p2, p(97)
        scalar thetap2 = r(r1)
      generate est_smp_p_2 = DI_hat_p2 > thetap2 if !missing(DI_hat_p2)
      /*-------------------------------------------------
              Selected |           Lasso   Post-est OLS
      -----------------+--------------------------------
                psym15 |       0.0050204      0.0504727
              nonvar10 |       0.0332255      0.0566407
                 _cons |*      0.0093693      0.0078205
      ---------------------------------------------------*/

  *******************************************************************************
  **RUN ANALYSIS ON THIRD ORDER
  *******************************************************************************
  *lasso with symptom 1 variables & third order
    eststo: rlasso cia_pos `=w_c_3'
        scalar lasso5 = e(selected)
        predict DI_hat_c3, xb
        _pctile DI_hat_c3, p(97)
        scalar thetac3 = r(r1)
      generate est_smp_c_3 = DI_hat_c3 > thetac3 if !missing(DI_hat_c3)
      /*-------------------------------------------------
               Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
                  csym9 |       0.0237423      0.0573605
                cc_9_10 |       0.5201267      0.9320388
              ccc_2_6_9 |       0.0309292      0.0970874
              ccc_2_8_9 |       0.0309292      0.0970874
                  _cons |*      0.0109300      0.0106007
      ---------------------------------------------------*/

  *lasso with symptom 1 variables and nonsymptoms & third order
    eststo: rlasso cia_pos `=w_c_4'
        scalar lasso6 = e(selected)
        predict DI_hat_c4, xb
        _pctile DI_hat_c4, p(97)
        scalar thetac4 = r(r1)
      generate est_smp_c_4 = DI_hat_c4 > thetac4 if !missing(DI_hat_c4)
      /*-------------------------------------------------
              Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
              nonvar10 |       0.0047070     -0.0053604
               cc_9_10 |       0.5103697      0.9922744
               cn_9_26 |       0.1549185      0.2647226
            ccn_2_6_21 |       0.1110389      0.3562815
            ccn_2_6_26 |       0.1549185      0.2647226
            ccn_3_7_10 |       0.0445639      0.1929509
            cnn_6_1_12 |       0.0070418      0.0993932
           cnn_6_12_29 |       0.0730151      0.1725925
          cnn_10_10_17 |       0.0071043      0.0283545
          cnn_10_10_21 |       0.0347216      0.1831101
           nnn_3_10_17 |       0.1198018      0.4109120
           nnn_4_10_14 |       0.2617854      0.2863352
           nnn_4_10_29 |       0.2636656      0.4717588
           nnn_5_10_28 |       0.3585716      0.5824153
           nnn_6_10_13 |       0.0320379      0.1891284
          nnn_10_14_17 |       0.0132344      0.0529942
          nnn_10_17_26 |       0.0256740      0.0637421
          nnn_10_17_28 |       0.0156526      0.0625481
          nnn_10_26_27 |       0.2343555      0.4613620
                 _cons |*      0.0093428      0.0077256
      ---------------------------------------------------*/

    *calculate nosymp
    forvalues i = 1(1)4 {
      gen noest_smp_c_`i' = 1 - est_smp_c_`i'
    }


  *lasso with symptom 2 variables & third order
    eststo: rlasso cia_pos `=w_p_3'
        scalar lasso7 = e(selected)
        predict DI_hat_p3, xb
        _pctile DI_hat_p3, p(97)
        scalar thetap3 = r(r1)
      generate est_smp_p_3 = DI_hat_p3 > thetap3 if !missing(DI_hat_p3)
      /*-------------------------------------------------
              Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
               pp_2_15 |       0.0171401      0.0612955
            ppp_1_2_15 |       0.0698467      0.1214031
             ppp_1_7_8 |       0.0015065      0.0560332
                 _cons |*      0.0108941      0.0101331
      ---------------------------------------------------*/

  *lasso with symptom 2 variables and nonsymptoms & third order
    eststo: rlasso cia_pos `=w_p_4'
        scalar lasso8 = e(selected)
        predict DI_hat_p4, xb
        _pctile DI_hat_p4, p(97)
        scalar thetap4 = r(r1)
      generate est_smp_p_4 = DI_hat_p4 > thetap4 if !missing(DI_hat_p4)
      /*-------------------------------------------------
              Selected |           Lasso   Post-est OLS
      ------------------+--------------------------------
               pn_2_10 |       0.0316655      0.0636698
               pn_15_8 |       0.0111079      0.0724013
            ppn_1_7_10 |       0.0420710      0.1079316
            ppn_2_3_10 |       0.0388330      0.0426360
            pnn_4_5_10 |       0.0901633      0.3857524
           nnn_3_10_14 |       0.1693512      0.3840901
           nnn_4_10_14 |       0.1413959      0.2075590
           nnn_4_10_26 |       0.1413959      0.2075590
           nnn_4_10_29 |       0.1801918      0.3384694
           nnn_5_10_21 |       0.3049911      0.6534605
           nnn_5_10_28 |       0.1988088      0.3394106
           nnn_6_10_13 |       0.0368607      0.1928711
           nnn_8_10_17 |       0.0129271      0.0253832
          nnn_10_12_21 |       0.0292733      0.2890652
          nnn_10_14_26 |       0.1864023      0.1955032
          nnn_10_17_26 |       0.0388166      0.1437055
          nnn_10_17_28 |       0.0281980      0.0860346
                 _cons |*      0.0092542      0.0071289
      ---------------------------------------------------*/

    *calculate nosymp
      forvalues i = 1(1)4 {
        gen noest_smp_p_`i' = 1 - est_smp_p_`i'
      }

  // describe to see labels and ID selected variables from above.
  describe csym9 nonvar10 psym15 cc_9_10 ccc_2_6_9 ccc_2_8_9 cn_9_26 ccn_2_6_21 ///
           ccn_2_6_26 ccn_3_7_10 cnn_6_1_12 cnn_6_12_29 cnn_10_10_17 cnn_10_10_21 ///
           nnn_3_10_17 nnn_4_10_14 nnn_4_10_29 nnn_5_10_28 nnn_6_10_13 ///
           nnn_10_14_17 nnn_10_17_26 nnn_10_17_28 nnn_10_26_27 pp_2_15 ///
           ppp_1_2_15 ppp_1_7_8 pn_2_10 pn_15_8 ppn_1_7_10 ppn_2_3_10 pnn_4_5_10 ///
           nnn_3_10_14 nnn_4_10_26 nnn_5_10_21 nnn_8_10_17 nnn_10_12_21 nnn_10_14_26
    /*
    Variable
    name               Variable label
    ------------------------------------------------------------------------
    csym9              New loss of taste or smell
    nonvar10           "Known Contact-Yes"
    psym15             Loss of smell or taste
    cc_9_10            New loss of taste or smell * Other Symptoms
    ccc_2_6_9          New or worsened cough * Muscle pain * New loss of taste or smell
    ccc_2_8_9          New or worsened cough * Sore throat * New loss of taste or smell
    cn_9_26            New loss of taste or smell * "Ethnicity-Hispanic"
    ccn_2_6_21         New or worsened cough * Muscle pain * "Race-Asian"
    ccn_2_6_26         New or worsened cough * Muscle pain * "Ethnicity-Hispanic"
    ccn_3_7_10         New or increased difficulty breathing * Headache * "Known Contact-Yes"
    cnn_6_1_12         Muscle pain * "Work-At Office" * "Health-Excelent"
    cnn_6_12_29        Muscle pain * "Health-Excelent" * "School-< College"
    cnn_10_10_17       Other Symptoms * "Known Contact-Yes" * "Sex-Male"
    cnn_10_10_21       Other Symptoms * "Known Contact-Yes" * "Race-Asian"
    nnn_3_10_17        "Work-Furloughed" * "Known Contact-Yes" * "Sex-Male"
    nnn_4_10_14        "Work-Unemployed" * "Known Contact-Yes" * "Health-Good"
    nnn_4_10_29        "Work-Unemployed" * "Known Contact-Yes" * "School-< College"
    nnn_5_10_28        "Work-Out Of Labour Force" * "Known Contact-Yes" * "School-High School"
    nnn_6_10_13        "Work-Retired" * "Known Contact-Yes" * "Health-Very Good"
    nnn_10_14_17       "Known Contact-Yes" * "Health-Good" * "Sex-Male"
    nnn_10_17_26       "Known Contact-Yes" * "Sex-Male" * "Ethnicity-Hispanic"
    nnn_10_17_28       "Known Contact-Yes" * "Sex-Male" * "School-High School"
    nnn_10_26_27       "Known Contact-Yes" * "Ethnicity-Hispanic" * "School-< High School"
    pp_2_15            Felt feverish * Loss of smell or taste
    ppp_1_2_15         Fever measured by thermometer * Felt feverish * Loss of smell or taste
    ppp_1_7_8          Fever measured by thermometer * Difficulty breathing * Muscle pain
    pn_2_10            Felt feverish * "Known Contact-Yes"
    pn_15_8            Loss of smell or taste * "Worked Outside"
    ppn_1_7_10         Fever measured by thermometer * Difficulty breathing * "Known Contact-Yes"
    ppn_2_3_10         Felt feverish * Chills * "Known Contact-Yes"
    pnn_4_5_10         Cough * "Work-Out Of Labour Force" * "Known Contact-Yes"
    nnn_3_10_14        "Work-Furloughed" * "Known Contact-Yes" * "Health-Good"
    nnn_4_10_26        "Work-Unemployed" * "Known Contact-Yes" * "Ethnicity-Hispanic"
    nnn_5_10_21        "Work-Out Of Labour Force" * "Known Contact-Yes" * "Race-Asian"
    nnn_8_10_17        "Worked Outside" * "Known Contact-Yes" * "Sex-Male"
    nnn_10_12_21       "Known Contact-Yes" * "Health-Excelent" * "Race-Asian"
    nnn_10_14_26       "Known Contact-Yes" * "Health-Good" * "Ethnicity-Hispanic"
    */
    /* label variables for esttab output */
    label variable csym9          "Anosmia"
    label variable nonvar10       "Known Contact"
    label variable psym15         "Anosmia"
    label variable cc_9_10        "Anosmia * Other Symptoms"
    label variable ccc_2_6_9      "Cough * Muscle pain * Anosmia"
    label variable ccc_2_8_9      "Cough * Sore throat * Anosmia"
    label variable cn_9_26        "Anosmia * Hispanic"
    label variable ccn_2_6_21     "Cough * Muscle pain * Asian"
    label variable ccn_2_6_26     "Cough * Muscle pain * Hispanic"
    label variable ccn_3_7_10     "Difficulty Breathing * Headache * Known Contact"
    label variable cnn_6_1_12     "Muscle pain * Work At Office * Excelent Health"
    label variable cnn_6_12_29    "Muscle pain * Excelent Health * Some College Education"
    label variable cnn_10_10_17   "Other Symptoms * Known Contact * Male"
    label variable cnn_10_10_21   "Other Symptoms * Known Contact * Asian"
    label variable nnn_3_10_17    "Furloughed * Known Contact * Male"
    label variable nnn_4_10_14    "Unemployed * Known Contact * Good Health"
    label variable nnn_4_10_29    "Unemployed * Known Contact * Some College Education"
    label variable nnn_5_10_28    "Out Of Labour Force * Known Contact * High School Graduate"
    label variable nnn_6_10_13    "Retired * Known Contact * Very Health Good"
    label variable nnn_10_14_17   "Known Contact * Good Health * Male"
    label variable nnn_10_17_26   "Known Contact * Male * Hispanic"
    label variable nnn_10_17_28   "Known Contact * Male * High School Graduate"
    label variable nnn_10_26_27   "Known Contact * Hispanic * Some High School Education"
    label variable pp_2_15        "Felt feverish * Anosmia"
    label variable ppp_1_2_15     "Fever Temperature * Felt feverish * Anosmia"
    label variable ppp_1_7_8      "Fever Temperature * Difficulty Breathing * Muscle pain"
    label variable pn_2_10        "Felt feverish * Known Contact"
    label variable pn_15_8        "Anosmia * Worked Outside"
    label variable ppn_1_7_10     "Fever Temperature * Difficulty Breathing * Known Contact"
    label variable ppn_2_3_10     "Felt feverish * Chills * Known Contact"
    label variable pnn_4_5_10     "Cough * Out Of Labour Force * Known Contact"
    label variable nnn_3_10_14    "Furloughed * Known Contact * Good Health"
    label variable nnn_4_10_26    "Unemployed * Known Contact * Hispanic"
    label variable nnn_5_10_21    "Out Of Labour Force * Known Contact * Asian"
    label variable nnn_8_10_17    "Worked Outside * Known Contact * Male"
    label variable nnn_10_12_21   "Known Contact * Excelent Health * Asian"
    label variable nnn_10_14_26   "Known Contact * Good Health * Hispanic"
    esttab, rename(psym15 csym9) order(cs* non* cc_* cn_* pp_*  pn_* c* p*) label
    eststo clear
    foreach i of numlist 1/8 {
      eststo: reg cia_pos `=lasso`i''
    }
    esttab, rename(psym15 csym9) order(cs* non* cc_* cn_* pp_*  pn_* c* p*) label
    esttab using "${dir_output}/lasso_regression_coefficients.tex", booktabs type ///
      rename(psym15 csym9) order(cs* non* cc_* cn_* pp_*  pn_* c* p*) label

  drop cc_* cn_* ccc_* ccn_* cnn_* pn_* pp_* ppp_* ppn_* pnn_* nn_* nnn_*

  save "${dir_data}/table2_lasso_results.dta", replace
} // end "${dir_data}/table2_lasso_results.dta"



******************************************************************************
 * Create statistics and table
******************************************************************************

  global extra_if ""
  /* global extra_if "& (phase1_inperson == 1)"
  global extra_if "& (phase2 == 1)" */

  svyset geoid_blkgroup [pweight = w_together_smooth], strata(stratum_phase1) || house_id

  noisily mean cia_pos [pw = w_together_smooth ]
  noisily mean pcr_pos [pw = w_together_smooth ] if (sampled_individual ==1)
  noisily mean pcr_pos [pw = w_together_smooth ] if (sampled_individual ==1) & phase1_inperson == 1
  noisily mean pcr_pos [pw = w_together_smooth ] if (sampled_individual ==1) & phase2 ==1
  noisily mean any_pos [pw = w_together_smooth ]

  mat table2_panelA = J(`=5 * 4', 4,.) // rows: 5 vars * 4 rows each (est, se, ci5, ci95), cols: 4
    mat colnames table2_panelA="Symptom" "Symptom and nonsymptom" "Symptom" "Symptom and nonsymptom"
    mat rownames table2_panelA="a_1-est" "a_1-se" "a_1-ci5" "a_1-ci95" ///
                               "a_0-est" "a_0-se" "a_0-ci5" "a_0-ci95" ///
                               "lambda-est" "lambda-se" "lambda-ci5" "lambda-ci95" ///
                               "mu_a-est" "mu_a-se" "mu_a-ci5" "mu_a-ci95" ///
                               "mu_b-est" "mu_b-se" "mu_b-ci5" "mu_b-ci95"

  quietly ///
  forvalues col = 1/4{
    local weight ""
    local weight "[iw = w_together_smooth]"
    local symp est_smp_p_`col'
    local symc est_smp_c_`col'

    preserve
      keep if (sampled_individual ==1) $extra_if

      sum `symc' `weight' if cia_pos == 1
        scalar alpha_1=r(mean)
        mat table2_panelA[1, `col'] = round(`=alpha_1' ,.0001) // alpha_1 estimate
        mat table2_panelA[2, `col'] = round(`=(alpha_1_ci5_c_`col' + alpha_1_ci95_c_`col')/2/1.96' ,.0001) // alpha_1 SE
        mat table2_panelA[3, `col'] = round(`=alpha_1 - alpha_1_ci5_c_`col'' ,.0001) // alpha_1 CI 5%
        mat table2_panelA[4, `col'] = round(`=alpha_1 + alpha_1_ci95_c_`col'' ,.0001) // alpha_1 CI 95%

      sum `symc' `weight' if cia_pos == 0
        scalar alpha_0=r(mean)
        mat table2_panelA[5, `col'] = round(`=alpha_0' ,.0001) // alpha_0 estimate
        mat table2_panelA[6, `col'] = round(`=(alpha_0_ci5_c_`col' + alpha_0_ci95_c_`col')/2/1.96' ,.0001) // alpha_0 SE
        mat table2_panelA[7, `col'] = round(`=alpha_0 - alpha_0_ci5_c_`col'' ,.0001) // alpha_0 CI 5%
        mat table2_panelA[8, `col'] = round(`=alpha_0 + alpha_0_ci95_c_`col'' ,.0001) // alpha_0 CI 95%

      scalar lambda = `=alpha_1 / alpha_0'
        mat table2_panelA[ 9, `col'] = round(`=lambda' ,.0001) // lambda estimate
        mat table2_panelA[10, `col'] = round(`=(lambda_ci5_c_`col' + lambda_ci95_c_`col')/2/1.96' ,.0001) // lambda SE
        mat table2_panelA[11, `col'] = round(`=lambda - lambda_ci5_c_`col'' ,.0001) // lambda CI 5%
        mat table2_panelA[12, `col'] = round(`=lambda + lambda_ci95_c_`col'' ,.0001) // lambda CI 95%

      sum `symp' `weight' if tested_yn == 1
        mat table2_panelA[13, `col'] = round(`=r(mean)' ,.0001) // mu_a estimate
        mat table2_panelA[14, `col'] = round(`=(u_a_ci5_p_`col' + u_a_ci95_p_`col')/2/1.96' ,.0001) // mu_a standard error
        mat table2_panelA[15, `col'] = round(`=r(mean) - u_a_ci5_p_`col'' ,.0001) // mu_a CI 5%
        mat table2_panelA[16, `col'] = round(`=r(mean) + u_a_ci95_p_`col'' ,.0001) // mu_a CI 95%

      *odds ratio -- mu_b
      sureg (cia_pos `symp', nocons) (tested_yn `symp', nocons) (p_it `symp', nocons)
        cap nlcom [cia_pos]`symp'*[tested_yn]`symp' / [p_it]`symp'
        mat temp3 = r(b)
        scalar stemp3 = temp3[1,1]
        mat table2_panelA[17, `col'] = round(`=stemp3' ,.0001) // tab B1 mu_b estimate
        mat table2_panelA[18, `col'] = round(`=(u_b_ci5_p_`col' + u_b_ci95_p_`col')/2/1.96' ,.0001) // mu_b SE
        mat table2_panelA[19, `col'] = round(`=stemp3 - u_b_ci5_p_`col'' ,.0001) // mu_b CI 5%
        mat table2_panelA[20, `col'] = round(`=stemp3 + u_b_ci95_p_`col'' ,.0001) // mu_b CI 95%

    restore
  }
  mat list table2_panelA

scalar row0="$\alpha^1$"
scalar row1="$\alpha^0$"
scalar row2="$\lambda$"
scalar row3="$\mu_a$"
scalar row4="$\mu_b$"

quietly foreach row of numlist 0/4 {
  // Row 1 - estimate
  noi di "`=row`row'' & ", _c
  foreach col of numlist 1/4 {
    noi di %4.3f table2_panelA[`row'*4 + 1,`col'] " & ", _c
  }
  noi di "\\"

  // Row 2 - Standard Error
  noi di " & ", _c
  foreach col of numlist 1/4 {
    noi di "(" %4.3f table2_panelA[`row'*4 + 2,`col'] ") & ", _c
  }
  noi di "\\"

  // Row 3 - CI
  noi di " & ", _c
  foreach col of numlist 1/4 {
    noi di "[" %4.3f max(0, table2_panelA[`row'*4 + 3,`col']) "," %4.3f table2_panelA[`row'*4 + 4,`col'] "] & ", _c
  }
  noi di "\\ \addlinespace"
}

} // end top top $quietlys
